data = []
fr = open('lda_post_5_phrase_topic_tokens.txt','r')
for line in fr:
    arr = line.strip('\r\n').split('\t')
    topicid = int(arr[0])
    phrase = arr[1]
    score = float(arr[2])
    data.append([topicid,phrase,score])
fr.close()

phrase2topicidscore = {}
for [topicid,phrase,score] in sorted(data,key=lambda x:-x[2]):
    if not phrase in phrase2topicidscore:
        phrase2topicidscore[phrase] = [topicid,score]

topicid2phrasescore = {}
for [phrase,[topicid,score]] in phrase2topicidscore.items():
    if not topicid in topicid2phrasescore:
        topicid2phrasescore[topicid] = []
    topicid2phrasescore[topicid].append([phrase,score])

fw = open('examples_post.txt','w')
for [topicid,lst] in sorted(topicid2phrasescore.items(),key=lambda x:x[0]):
    for [phrase,score] in sorted(lst,key=lambda x:-x[1]):
        s = str(topicid)+'\t'+phrase+'\t'+str(score)
        if '_' in phrase:
            s = '\t'+s
        fw.write(s+'\n')
fw.close()

data = []
fr = open('lda_response_10_phrase_topic_tokens.txt','r')
for line in fr:
    arr = line.strip('\r\n').split('\t')
    topicid = int(arr[0])
    phrase = arr[1]
    score = float(arr[2])
    data.append([topicid,phrase,score])
fr.close()

phrase2topicidscore = {}
for [topicid,phrase,score] in sorted(data,key=lambda x:-x[2]):
    if not phrase in phrase2topicidscore:
        phrase2topicidscore[phrase] = [topicid,score]

topicid2phrasescore = {}
for [phrase,[topicid,score]] in phrase2topicidscore.items():
    if not topicid in topicid2phrasescore:
        topicid2phrasescore[topicid] = []
    topicid2phrasescore[topicid].append([phrase,score])

fw = open('examples_response.txt','w')
for [topicid,lst] in sorted(topicid2phrasescore.items(),key=lambda x:x[0]):
    for [phrase,score] in sorted(lst,key=lambda x:-x[1]):
        s = str(topicid)+'\t'+phrase+'\t'+str(score)
        if '_' in phrase:
            s = '\t'+s
        fw.write(s+'\n')
fw.close()

